* moved the folder of raw data /thecb/thecb_07-01/ to /projects/col_remediation/

set more off
cap log close
clear
program drop _all

#delimit;

log using ${d1}log/process_report9.log, replace;
/* ---------------------------------------------------------------------------------------
First, read in the new grad files
---------------------------------------------------------------------------------------- */;

* Enter raw 2004/5 college graduation data;

forvalues y=4/5 {;
 foreach type in sr jr {;
  local upcstype=upper("`type'");

  if `y'==4 & "`type'"=="sr" {;
	infix 
	byte	rectype	1 /* should always be 9 */
	long	school 2-7
	str9	altpid 8-16
	str1	sex 17
	byte	ethnic 18
	long	birthdat 19-24
	str8	degree 25-32
	byte	level 33
	long	major 34-41
	byte	period 42  /* always 1 */
	int	year 43-46
	str1	update 47
	using "${d1}thecb/thecb_07-01/D9SR104.", clear;
  };

  if `y'==5 & "`type'"=="sr" {;

	infix 
	byte	rectype	1 /* should always be 9 */
	long	school 2-7
	str9	altpid 8-16
	str1	sex 17
	byte	ethnic 18
	long	birthdat 19-24
	str8	degree 27-34
	byte	level 35
	long	major 36-43
	byte	period 44  /* always 1 */
	int	year 45-48
	str1	update 49
	long	maward 50-51
 
	using "${d1}thecb/thecb_07-01/D9SR105.", clear;
  };

  if `y'==4 & "`type'"=="jr" {;

	infix 
	byte	rectype	1 /* should always be 9 */
	long	school 2-7
	str9	altpid 8-16
	str1	sex 17
	byte	ethnic 18
	long	birthdat 19-24
	str8	degree 25-32
	byte	level 33
	long	major 34-41
	byte	period 42  /* always 1 */
	int	year 43-46
	long	maward 47-48

    using "${d1}thecb/thecb_07-01/D9JR104.", clear;
  };

  if `y'==5 & "`type'"=="jr" {;

	infix 
	byte	rectype	1 /* should always be 9 */
	long	school 2-7
	str9	altpid 8-16
	str1	sex 17
	byte	ethnic 18
	long	birthdat 19-24
	str8	degree 27-34
	byte	level 35
	long	major 36-43
	byte	period 44  /* always 1 */
	int	year 45-48
	long	maward 48-49

	using "${d1}thecb/thecb_07-01/D9JR105.", clear;
  };
	gen nsex=1*(sex=="M");
	drop sex;
	rename nsex sex;
	gen newethnic=5*(ethnic==1)+3*(ethnic==2)+4*(ethnic==3)+2*(ethnic==4)+1*(ethnic==5)+6*(ethnic==6)+7*(ethnic==7);
	cap rename birthdat bdate;
	gen str4 y=substr(string(bdate),1,4);
	gen str2 m=substr(string(bdate),5,6);
	gen newbdate=mdy(real(m),15,real(y));
	drop bdate ethnic;
	rename newbdate bdate;
	rename newethnic ethnic;
	confirm var bdate;
   local yp1=`y'+1;
   qui compress;
   save "${d1}data/highered/report9`type'20`yp1'_full", replace;
 };   /* close type loop */;
};    /* close  year loop */;

/* ---------------------------------------------------------------------------------------
Program to process the grad files
---------------------------------------------------------------------------------------- */;

***
*Programs to clean duplicates, create dummy variable for associate or bachelor degreee
***;

*For JC's;
program define clpsjr;
 gen obs=_n;
 sort altpid obs;
 gen ass=level==1;
 egen sumass=sum(ass), by(altpid);
 by altpid: keep if _n==1;
 gen associate=sumass>0;
 drop ass sumass;
end;

*For SR College;
program define clpssr;
 gen obs=_n;
 sort altpid obs;
 gen ass=level==1;
 gen bac=level==2;
 egen sumass=sum(ass), by(altpid);
 egen sumbac=sum(bac), by(altpid);
 by altpid: keep if _n==1;
 gen associate=sumass>0;
 gen baccalaureate=sumbac>0;
 drop ass bac sumass sumbac;
end;

/* ---------------------------------------------------------------------------------------
Run the program to process the graduation files
---------------------------------------------------------------------------------------- */;

**** Run the programs to process the college graduation files;
forvalues cbyear=192/206 {;
 local tspyear=`cbyear'-1;

 if `cbyear'<205 {;
  qui use "${hb_data}HigherEd/report9/senior/D9sr`cbyear'_1.dta", clear;
 };
 else {;
  qui use "${d1}data/highered/report9sr`cbyear'_full";

 };
 disp "Processing data for SR, Tspyear=`tspyear'";
 count;
 clpssr;
 cap rename birthdat bdate;
 confirm var bdate;
 rename bdate bthday;
 rename ethnic ethnic_t;
 qui compress;
 summ;
 save "${d1}data/highered/report9sr`tspyear'", replace;

 if `cbyear'<205 {;
  qui use "${hb_data}HigherEd/report9/junior/D9jr`cbyear'_1.dta", clear;
 };
 else {;
  qui use "${d1}data/highered/report9jr`cbyear'_full";
 };
 disp "Processing data for JR, Tspyear=`tspyear'";
 count;
 clpsjr;
 cap rename birthdat bdate;
 confirm var bdate;
 rename bdate bthday;
 if `tspyear'==202 | `tspyear'==203 {;    /* change format for birthday variable in 202 and 203 */;
   qui {;
   gen yr_s=substr(bthday,1,4);
   gen m_s=substr(bthday,5,2);
   gen dob_r=mdy(real(m_s),15,real(yr_s));
   drop bthday;
   rename dob_r bthday;
   format bthday %dN/D/Y;
   drop yr_s m_s;
   };
 };
 rename ethnic ethnic_t;
 qui compress;
 summ;
 save "${d1}data/highered/report9jr`tspyear'", replace;

};




log close;

